%% Section 0: Description of the file

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%% Explanation of what the file does
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% 
% This file takes as input data created by other files and uses it to 
% construct other things needed by the theoretical model
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%% Input files needed
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% 
% InputData.xlsx
% 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%% Output files produced
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% 
% ProcessedData.xlsx
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%% Matlab functions invoked
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% 
% NEBA_PriceSolver
% NEBA_RevenueSolver

%% Section 1: Clean trade matrix to avoid negative Cobb-Douglas shares

clear all
clc

% Define some useful parameters

S           = 14;
I           = 87;
M           = 50;
sigma       = 6;

% Import data

dataimp     = xlsread('InputData.xlsx','BILATRAW','C2:CK1219');
datarea     = reshape(dataimp',[I,I,S]);
labshares   = xlsread('InputData.xlsx','VA','C2:P88');
inoutcdp    = xlsread('InputData','IO','D2:Q533');
inouttemp1  = permute(reshape(inoutcdp',[S,S,I-M+1]),[2,1,3]);
inouttemp2  = NaN(S,S,I);
inouttemp2(:,:,1:M) = repmat(inouttemp1(:,:,1),1,1,M);
inouttemp2(:,:,M+1:end) = inouttemp1(:,:,2:end);
inouttemp3  = permute(inouttemp2,[3,1,2]);
inoutmat    = inouttemp3 .* repmat(permute(1-labshares,[1,3,2]),1,S,1);

% Define trade shares, labor income, deficits, final cons. shares, etc

tradesharedat = datarea./repmat(sum(datarea,1),I,1,1);
expbysec    = squeeze(sum(datarea,1));
revbysec    = squeeze(sum(datarea,2));
laborincsec = labshares .* revbysec;
deficits    = sum(expbysec,2)-sum(revbysec,2);

% Define the original alphas which are compatible with the data being 
% an equilibrium in the first period if there are no shocks.

alphasinter = repmat(permute(revbysec,[1,3,2]),1,S,1);
alphasregnum = expbysec - sum(inoutmat .* alphasinter,3);
alphasreg   = alphasregnum./repmat(sum(alphasregnum,2),1,S);

% Boxplot these original alphas

figure(1)
boxplot(alphasreg)
title('Box Plot of Alphas Across Sectors')

% Now define a new set of alphas that set negative alphas to zero and 
% readjust the remaining alphas to add up to one

cutoff      = 0.0001;
posalphas   = alphasreg.*(alphasreg>cutoff)+cutoff.*(alphasreg<cutoff);
alphasfix   = posalphas./repmat(sum(posalphas,2),1,S);

% So now we have two sets of alphas:
% 1. The ones that make the original data an equilibrium without shocks 
% (some of which are negative), alphasreg; 
% 2. The ones that fix the negative alphas, alphasfix;
% First lets compare them to get a sense of the magnitude of the change 
% we will effect on the data

corr([alphasreg(:),alphasfix(:)])

% Now plot the alphas

figure(2)
scatter(alphasreg(:),alphasfix(:))
title('Scatter of Original Alphas vs Positive Alphas')

% Now we will pick new alphas (original, fixed ones, or cdp ones), 
% and adjust the data (i.e., bilateral trade flows), so that with 
% the new alphas the fixed data is an equilibrium in the first period.

targetalphas   = alphasfix;

% Define new alphas and some other options for solver

Amatrix     = targetalphas;
Smatrix     = ones(1,S)*sigma;
maxiter     = 2000;
tolerance   = 1e-5;
lambda      = 0.2;

% With the new alphas we are able to obtain the new data.

cd ..
[EqWages,EqLabor,EqPrices,EqShares,EqRev] = NEBA_PeriodTatoSolver(...
tradesharedat,ones(I,I,S),ones(I,S),labshares,inoutmat,Smatrix,...
Amatrix,laborincsec,ones(I,S),0.01*ones(I,S),deficits,maxiter,...
tolerance,lambda);

% Now generate the new data

NewIncome   = sum(EqWages .* laborincsec,2);
expjs       = targetalphas .* repmat(NewIncome+deficits,1,S) + ...
              sum(inoutmat.*repmat(permute(EqRev,[1,3,2]),1,S,1),3);
newdata     = EqShares .* repmat(permute(expjs,[3,1,2]),I,1,1);

% Compare both sets of data and save new data

corr(newdata(:),datarea(:))
figure(3)
scatter(newdata(:),datarea(:))
title('Scatter of Old Data vs New Data')

% Compute alphas in new data, hopefully close to target ones

tradesharedat = newdata./repmat(sum(newdata,1),I,1,1);
expbysec    = squeeze(sum(newdata,1));
revbysec    = squeeze(sum(newdata,2));
laborincsec = labshares .* revbysec;
deficits    = sum(expbysec,2)-sum(revbysec,2);
alphasinter = repmat(permute(revbysec,[1,3,2]),1,S,1);
alphasregnum = expbysec - sum(inoutmat .* alphasinter,3);
newalphas   = alphasregnum./repmat(sum(alphasregnum,2),1,S);

corr([newalphas(:),targetalphas(:),alphasreg(:)])
figure(4)
subplot(1,2,1)
scatter(newalphas(:),targetalphas(:))
title('Scatter of New Alphas vs Target Alphas')
subplot(1,2,2)    
scatter(newalphas(:),alphasreg(:))
title('Scatter of New Alphas vs Original Alphas')

% Write new data, by putting it in the same format as the original data

datactm     = reshape(newdata,I,I*S,1)';

corr(datactm(:),dataimp(:))
figure(5)
scatter(datactm(:),dataimp(:))
title('Scatter of Old Data vs New Data')

cd Inputs/
xlswrite('ProcessedData.xlsx',datactm,'BIFIXED','C2:CK1219')

%% Section 2: Make migration matrices compatible with levels across years

clear all
clc

% First import labor in 2000 from the census

LCA0 = xlsread('InputData.xlsx','L2000CENSUS','B2:P88');
LCA1 = LCA0(1:50,:)';
LCA2 = LCA1(:);

% Second import labor in 2000 from the BLS

LBA0 = xlsread('InputData.xlsx','L2000BLS','B2:P88');
LBA1 = LBA0(1:50,:)';
LBA2 = LBA1(:);

% Third import labor in 1999 from the BLS
% We make it sum to the same as the 2000 BLS vector since our model has no
% population growth at the U.S. aggregate level

LBB0 = xlsread('InputData.xlsx','L1999BLS','B2:P88');
LBB1 = LBB0(1:50,:)';
LBB2 = LBB1(:);
LBB3 = LBB2/sum(LBB2)*sum(LBA2);

% Fourth compute ratios between 1999 and 2000 in BLS data

RAT  = LBB3./LBA2;

% Sets NANs to one, these are only the sectors that have zero employment in
% 2000 BLS data but they also all have zero employment in 1999 BLS data
% so this does not matter. They do not have zero employment in census data

RAT(isnan(RAT))=1;

% Then winsorize 2.5% of values on both sides

LB = prctile(RAT,2.5);
UB = prctile(RAT,97.5);
RAT(RAT<LB) = LB;
RAT(RAT>UB) = UB;

% Then multiply RAT by the 2000 census labor vector and renormalize so that
% they have the same sum

LCB1 = RAT .* LCA2;
LCB2 = LCB1/sum(LCB1)*sum(LCA2);

% Now compute migrations 

migra = (LCA2./LCB2-1)*100; %#ok<*NASGU>

% Now do the procedure to make the initial mu compatible with
% the "census" labor vectors in 1999 and 2000

mu      = xlsread('InputData.xlsx','MURAW','D2:ABY751');
snum    = size(LCA1,1);
rnum    = size(LCA1,2);
S       = size(mu,1);
mut     = mu';
muv     = mut(:);

options = optimoptions('quadprog','Display','iter','TolCon',1e-10,...
          'TolFun',1e-10,'TolX',1e-10);

H       = 2 * speye(S^2);
f       = -2 * muv;
lb      = zeros(S^2,1);
ub      = ones(S^2,1) .* (muv>0);
beq     = [ones(S,1);LCA2(1:end-1)];
A1      = kron(eye(S),ones(1,S));
A2      = kron(LCB2',eye(S));
Aeq     = sparse([A1;A2(1:end-1,:)]);
[x,outout] = quadprog(H,f,[],[],Aeq,beq,lb,ub,[],options);

disp(Aeq*x)
scatter(muv,x)
disp(corr(muv,x))

scatter(muv,x)
hold on
scatter(muv,muv)
scatter(muv,lb)
scatter(muv,ub)
hold off

% Now write output to processed data excel file

nem = reshape(x,S,S)';
scatter(mu(:),nem(:))

xlswrite('ProcessedData.xlsx',nem,'MUFIXED','D2:ABY751')

diff = LCA2 - nem'*LCB2;
disp(sum(abs(diff)))
L1999re = reshape(LCB2,snum,rnum)';

xlswrite('ProcessedData.xlsx',L1999re,'L1999MIX','B2:P51')

%% Section 3: Make migration matrices compatible with levels across years
% without migration across states (only across sectors)

clear all
clc

% First import labor in 2000 from the census

LCA0 = xlsread('InputData.xlsx','L2000CENSUS','B2:P88');
LCA1 = LCA0(1:50,:)';
LCA2 = LCA1(:);

% Second import labor in 2000 from the BLS

LBA0 = xlsread('InputData.xlsx','L2000BLS','B2:P88');
LBA1 = LBA0(1:50,:)';
LBA2 = LBA1(:);

% Third import labor in 1999 from the BLS
% We make it sum to the same as the 2000 BLS vector since our model has no
% population growth at the U.S. aggregate level

LBB0 = xlsread('InputData.xlsx','L1999BLS','B2:P88');
LBB1 = LBB0(1:50,:)';
LBB2 = LBB1(:);
LBB3 = LBB2/sum(LBB2)*sum(LBA2);

% Fourth compute ratios between 1999 and 2000 in BLS data

RAT  = LBB3./LBA2;

% Sets NANs to one, these are only the sectors that have zero employment in
% 2000 BLS data but they also all have zero employment in 1999 BLS data
% so this does not matter. They do not have zero employment in census data

RAT(isnan(RAT))=1;

% Then winsorize 2.5% of values on both sides

LB = prctile(RAT,2.5);
UB = prctile(RAT,97.5);
RAT(RAT<LB) = LB;
RAT(RAT>UB) = UB;

% Then multiply RAT by the 2000 census labor vector and renormalize so that
% they have the same sum state by state (because there is no migration)

LCB1 = RAT .* LCA2;
LCB2 = reshape(LCB1,size(LCA1,1),size(LCA1,2));
LCBs = LCB2./repmat(sum(LCB2),size(LCB2,1),1);
LCB3 = LCBs.*repmat(sum(LCA1),size(LCB2,1),1);
LCB4 = LCB3(:);

% 1999 constructed labor matrix for exporting

L1999cons(1:50,:) = LCB3';
L1999cons(51:87,:) = LBB0(51:end,:);

% Now compute migrations 

migra = (LCA2./LCB4-1)*100;

% Now do the procedure to make the initial mu compatible with
% the "census" labor vectors in 1999 and 2000

mu      = xlsread('InputData.xlsx','MURAWNM','D2:ABY751');
S       = size(LCA1,1);
rnum    = size(LCA1,2);
nee     = zeros(size(mu));
options = optimoptions('quadprog','Display','iter','TolCon',1e-8,'TolFun',1e-8,'TolX',1e-8);

for i=1:rnum
    muR = mu((i-1)*S+1:i*S,(i-1)*S+1:i*S);
    LpA = LCA2((i-1)*S+1:i*S,1);
    LpB = LCB4((i-1)*S+1:i*S,1);
    mut = muR';
    muv = mut(:);
    
    H       = 2 * eye(S^2);
    f       = -2 * muv;
    lb      = zeros(S^2,1);
    ub      = ones(S^2,1);
    beq     = [ones(S,1);LpA(1:end-1)];
    A1      = kron(eye(S),ones(1,S));
    A2      = kron(LpB',eye(S));
    Aeq     = [A1;A2(1:end-1,:)];
    [x,outout] = quadprog(H,f,[],[],Aeq,beq,lb,ub,[],options);
    
    nem = reshape(x,S,S)';
    nee((i-1)*S+1:i*S,(i-1)*S+1:i*S) = nem;

    scatter(muR(:),nem(:))
    pause(1)
end

scatter(mu(:),nee(:))
corr(mu(:),nee(:))

xlswrite('ProcessedData.xlsx',nee,'MUFIXEDNM','D2:ABY751')
